/*************************************************************************
 MAKE_TRIPS_DATA.SAS

  This program merges R&D activity (disease-year observations, resulting
    from make_rd.sas), mortality (country-disease-year observations, resulting
    from make_mortality.sas), and country-level data on IPRs and income levels
    from various other sources detailed below.
    
    
*************************************************************************/

* Define the local directory in which you are working;
%LET LOCAL = /mnt/Research/RESTAT;
LIBNAME HERE "&LOCAL";

 * World Bank income classifications;
   PROC IMPORT
       DATAFILE='WB_classifications.csv'
       DBMS=CSV
       OUT=WB_CLASSIFICATIONS;
       GETNAMES=YES;       
   DATA WB (KEEP=WB_COUNTRY YEAR INCOME_LEVEL);
       SET WB_CLASSIFICATIONS;
       * Define income level as that in 1995, when WTO was established.;
       DO YEAR = 1985 TO 2007;
           IF TYPE1995 EQ 'H' THEN INCOME_LEVEL = 4;
           ELSE IF TYPE1995 EQ 'UM' THEN INCOME_LEVEL = 3;
           ELSE IF TYPE1995 IN('LM','LM*') THEN INCOME_LEVEL = 2;
           ELSE IF TYPE1995 EQ 'L' THEN INCOME_LEVEL = 1;
	   IF WB_COUNTRY EQ 'Palau' THEN INCOME_LEVEL = 3;
	   ELSE IF WB_COUNTRY EQ 'San Marino' THEN INCOME_LEVEL = 4;
	   ELSE IF WB_COUNTRY EQ 'Timor-Leste' THEN INCOME_LEVEL = 1;
           OUTPUT;
       END;
   PROC SORT DATA=WB;
       BY WB_COUNTRY YEAR;
   PROC FREQ;
       TABLES INCOME_LEVEL;
   PROC FREQ DATA=WB (WHERE=(INCOME_LEVEL EQ .));
       TABLES WB_COUNTRY;
   
 * IP measures;
   PROC IMPORT
       DATAFILE='Country_IPR_data.csv'
       DBMS=CSV
       OUT=COUNTRY_IPR_DATA;
       GETNAMES=YES;       
   DATA IP (KEEP=WB_COUNTRY WTO_COUNTRY WHO_COUNTRY COUNTRY_CODE IMS_COUNTRY GP_COUNTRY WDI_COUNTRY YEAR WTO_MEMBER
       TRIPS_COMPLIANT TRIPS_COMPLIANT_EST GP_INDEX GP_PHARM_ENFORCED HAMDEN_INDEX HAMDEN_PHARM WTO_YEAR);
       SET COUNTRY_IPR_DATA;
       WTO_MEMBER = 0;
       TRIPS_COMPLIANT = 0;
       TRIPS_COMPLIANT_EST = 0;
       
     * Convert to country-year observations.;
       ARRAY HAMDEN1{*} TRIPS1995-TRIPS2007;
       ARRAY HAMDEN2{*} PH1995-PH2007;
       DO YEAR=1985 TO 1989;
           GP_INDEX = GP1985;
           * This uses the chemical/pharma-specific measure and enforcement measure;
           GP_PHARM_ENFORCED = (GP1985_P * (GP1985_E GT .5));
           OUTPUT;
       END;
       DO YEAR=1990 TO 1994;
           GP_INDEX = GP1990;
           GP_PHARM_ENFORCED = (GP1990_P * (GP1990_E GT .5));
           OUTPUT;
       END;
       I = 0;
       DO YEAR=1995 TO 2007;
           I = I+1;
           WTO_MEMBER = (YEAR GE WTO_YEAR AND WTO_YEAR NE .);
           TRIPS_COMPLIANT = (YEAR GE TRIPS_EST_YEAR AND TRIPS_EST_YEAR NE .);
           TRIPS_COMPLIANT_EST = (YEAR GE TRIPS_RESEARCH_YEAR AND TRIPS_RESEARCH_YEAR NE .);
           HAMDEN_INDEX = HAMDEN1{I};
           HAMDEN_PHARM = HAMDEN2{I};
           IF YEAR GE 2005 THEN GP_INDEX = GP2005;
           ELSE IF YEAR GE 2000 THEN GP_INDEX = GP2000;
           ELSE IF YEAR GE 1995 THEN GP_INDEX = GP1995;
           IF YEAR GE 2005 THEN GP_PHARM_ENFORCED = (GP2005_P * (GP2005_E GT .5));
           ELSE IF YEAR GE 2000 THEN GP_PHARM_ENFORCED = (GP2000_P * (GP2000_E GT .5));
           ELSE IF YEAR GE 1995 THEN GP_PHARM_ENFORCED = (GP1995_P * (GP1995_E GT .5));
           OUTPUT;
       END;

   PROC SORT DATA=IP;
       BY WB_COUNTRY YEAR;

   DATA IP;
       MERGE IP WB;
       BY WB_COUNTRY YEAR;
       
       IF COUNTRY_CODE NE .;
       IF WB_COUNTRY NE '';

       IF TRIPS_COMPLIANT EQ 0 AND YEAR LT WTO_YEAR AND WTO_YEAR NE . THEN DO;
	  IF INCOME_LEVEL LE 2 THEN TRIPS_COMPLIANT = 0;
	  ELSE TRIPS_COMPLIANT = MAX(0,GP_PHARM_ENFORCED);
       END;

       * If missing TRIPS_COMPLIANCE, assign 0. This assumes non-WTO members are not compliant.;
       IF TRIPS_COMPLIANCE EQ . THEN TRIPS_COMPLIANCE = 0;
       * Since Hamden index focuses on low income countries, assign value of TRIPS compliance for richer countries;
       IF HAMDEN_PHARM EQ . THEN HAMDEN_PHARM = TRIPS_COMPLIANT;
       * Final measures that assign the most generous or least generous values of IP protection based on all sources;
       IP_MEASURE_MIN = MIN(GP_PHARM_ENFORCED, HAMDEN_PHARM, TRIPS_COMPLIANT);
       IF IP_MEASURE_MIN EQ . AND INCOME_LEVEL EQ 4 THEN IP_MEASURE_MIN = 1;
       ELSE IF IP_MEASURE_MIN EQ . THEN IP_MEASURE_MIN = 0;
       IP_MEASURE_MAX = MAX(GP_PHARM_ENFORCED, HAMDEN_PHARM, TRIPS_COMPLIANT);
       IF IP_MEASURE_MAX EQ . AND INCOME_LEVEL GE 3 THEN IP_MEASURE_MAX = 1;
       ELSE IF IP_MEASURE_MAX EQ . THEN IP_MEASURE_MAX = 0;
       
       PROC FREQ DATA=IP (WHERE=(YEAR GE 1990));
	   TABLES YEAR;
           TABLES INCOME_LEVEL;
           TABLES INCOME_LEVEL*TRIPS_COMPLIANT;
           TABLES INCOME_LEVEL*GP_PHARM_ENFORCED;
           TABLES INCOME_LEVEL*IP_MEASURE_MIN;
           TABLES INCOME_LEVEL*IP_MEASURE_MAX;
	   TABLES TRIPS_COMPLIANT*GP_PHARM_ENFORCED;
	   TABLES TRIPS_COMPLIANT*HAMDEN_PHARM;
       PROC FREQ DATA=IP (WHERE=(INCOME_LEVEL EQ .));
           TABLES WB_COUNTRY;
           TABLES YEAR*WTO_COUNTRY;
       PROC FREQ DATA=IP (WHERE=(IP_MEASURE_MIN EQ .));
           TABLES YEAR*WTO_COUNTRY;
       PROC FREQ DATA=IP (WHERE=(HAMDEN_PHARM EQ .));
           TABLES YEAR*WTO_COUNTRY;
       PROC PRINT DATA=IP (WHERE=(TRIPS_COMPLIANT NE HAMDEN_PHARM));
	   VAR WB_COUNTRY YEAR TRIPS_COMPLIANT HAMDEN_PHARM;
       PROC PRINT DATA=IP (WHERE=(TRIPS_COMPLIANT NE GP_PHARM_ENFORCED AND GP_PHARM_ENFORCED NE .));
	   VAR WB_COUNTRY YEAR TRIPS_COMPLIANT HAMDEN_PHARM GP_PHARM_ENFORCED;
           
       
   PROC SORT DATA=IP;
       BY COUNTRY_CODE YEAR;
   
 * Mortality data, multiple-imputation version;
   DATA ICD;
       SET HERE.WHO_DEATHS_I;
       WHO = PUT(WHO_CODE,4.);
       DROP WHO_CODE;
       RENAME WHO = WHO_CODE;
       IF DEATHS EQ . THEN DEATHS = IDEATHS;
       IF WHO_CODE EQ '.' THEN WHO_CODE = '';
       * Drop broad categories;
       IF WHO_CODE NOT IN('1001','1026','1051','1058','1064','1072','1078','1084','1087');
       * Drop non-drug related causes of death;
       IF WHO_CODE NOT IN('1094','1095','1096','1097','1098','1099','1100','1101','1102','1103');
       NEGLECTED = WHO_CODE IN('1020','1021','1005','1006','1074','1059','1004','1002','1023','1022','1025','1017','1065');
       NEGLECTED_NONHIV = WHO_CODE IN('1021','1005','1006','1074','1059','1004','1002','1023','1022','1025','1017','1065');
   PROC CONTENTS;
   PROC SORT DATA=ICD;
       BY COUNTRY_CODE YEAR;

   DATA ICD;
       MERGE IP (IN=IN1) ICD (IN=IN2);
       BY COUNTRY_CODE YEAR;
        IF YEAR GE 1985 AND INCOME_LEVEL NE .;
        INIP = IN1;
	INICD = IN2;
   PROC FREQ;
       TABLES YEAR;
       TABLES YEAR*INCOME_LEVEL;
       TABLES NEGLECTED;
       TABLES INIP*INICD;
       TABLES INCOME_LEVEL;
   PROC FREQ DATA=ICD (WHERE=(INICD AND NOT INIP));
       TABLES COUNTRY_CODE;
       TABLES YEAR;       

   PROC SORT DATA=ICD;
       BY WHO_CODE YEAR _IMPUTATION_ INCOME_LEVEL;
   PROC MEANS DATA=ICD (WHERE=(TRIPS_COMPLIANT)) NOPRINT;
       ID NEGLECTED NEGLECTED_NONHIV;
       BY WHO_CODE YEAR _IMPUTATION_ INCOME_LEVEL;
       OUTPUT OUT=DEATHS_KM1 SUM(DEATHS)=DEATHS;
   PROC MEANS DATA=ICD (WHERE=(NOT TRIPS_COMPLIANT)) NOPRINT;
       ID NEGLECTED NEGLECTED_NONHIV;
       BY WHO_CODE YEAR _IMPUTATION_ INCOME_LEVEL;
       OUTPUT OUT=DEATHS_KM2 SUM(DEATHS)=DEATHS;
   PROC MEANS DATA=ICD (WHERE=(GP_PHARM_ENFORCED)) NOPRINT;
       ID NEGLECTED NEGLECTED_NONHIV;
       BY WHO_CODE YEAR _IMPUTATION_ INCOME_LEVEL;
       OUTPUT OUT=DEATHS_GP1 SUM(DEATHS)=DEATHS;
   PROC MEANS DATA=ICD (WHERE=(NOT GP_PHARM_ENFORCED)) NOPRINT;
       ID NEGLECTED NEGLECTED_NONHIV;
       BY WHO_CODE YEAR _IMPUTATION_ INCOME_LEVEL;
       OUTPUT OUT=DEATHS_GP2 SUM(DEATHS)=DEATHS;
   PROC MEANS DATA=ICD (WHERE=(HAMDEN_PHARM)) NOPRINT;
       ID NEGLECTED NEGLECTED_NONHIV;
       BY WHO_CODE YEAR _IMPUTATION_ INCOME_LEVEL;
       OUTPUT OUT=DEATHS_H1 SUM(DEATHS)=DEATHS;
   PROC MEANS DATA=ICD (WHERE=(NOT HAMDEN_PHARM)) NOPRINT;
       ID NEGLECTED NEGLECTED_NONHIV;
       BY WHO_CODE YEAR _IMPUTATION_ INCOME_LEVEL;
       OUTPUT OUT=DEATHS_H2 SUM(DEATHS)=DEATHS;
   PROC MEANS DATA=ICD (WHERE=(IP_MEASURE_MIN)) NOPRINT;
       ID NEGLECTED NEGLECTED_NONHIV;
       BY WHO_CODE YEAR _IMPUTATION_ INCOME_LEVEL;
       OUTPUT OUT=DEATHS_IPMIN1 SUM(DEATHS)=DEATHS;
   PROC MEANS DATA=ICD (WHERE=(NOT IP_MEASURE_MIN)) NOPRINT;
       ID NEGLECTED NEGLECTED_NONHIV;
       BY WHO_CODE YEAR _IMPUTATION_ INCOME_LEVEL;
       OUTPUT OUT=DEATHS_IPMIN2 SUM(DEATHS)=DEATHS;
   PROC MEANS DATA=ICD (WHERE=(IP_MEASURE_MAX)) NOPRINT;
       ID NEGLECTED NEGLECTED_NONHIV;
       BY WHO_CODE YEAR _IMPUTATION_ INCOME_LEVEL;
       OUTPUT OUT=DEATHS_IPMAX1 SUM(DEATHS)=DEATHS;
   PROC MEANS DATA=ICD (WHERE=(NOT IP_MEASURE_MAX)) NOPRINT;
       ID NEGLECTED NEGLECTED_NONHIV;
       BY WHO_CODE YEAR _IMPUTATION_ INCOME_LEVEL;
       OUTPUT OUT=DEATHS_IPMAX2 SUM(DEATHS)=DEATHS;

   DATA DEATHS_WHO;
       MERGE
           DEATHS_KM1 (WHERE=(INCOME_LEVEL EQ 4) RENAME=(DEATHS=DEATHS_TRIPS_4))
           DEATHS_KM2 (WHERE=(INCOME_LEVEL EQ 4) RENAME=(DEATHS=DEATHS_NOTRIPS_4))
           DEATHS_GP1 (WHERE=(INCOME_LEVEL EQ 4) RENAME=(DEATHS=DEATHS_GP_4))
           DEATHS_GP2 (WHERE=(INCOME_LEVEL EQ 4) RENAME=(DEATHS=DEATHS_NOGP_4))
           DEATHS_H1 (WHERE=(INCOME_LEVEL EQ 4) RENAME=(DEATHS=DEATHS_H_4))
           DEATHS_H2 (WHERE=(INCOME_LEVEL EQ 4) RENAME=(DEATHS=DEATHS_NOH_4))
           DEATHS_IPMIN1 (WHERE=(INCOME_LEVEL EQ 4) RENAME=(DEATHS=DEATHS_IPMIN_4))
           DEATHS_IPMIN2 (WHERE=(INCOME_LEVEL EQ 4) RENAME=(DEATHS=DEATHS_NOIPMIN_4))
           DEATHS_IPMAX1 (WHERE=(INCOME_LEVEL EQ 4) RENAME=(DEATHS=DEATHS_IPMAX_4))
           DEATHS_IPMAX2 (WHERE=(INCOME_LEVEL EQ 4) RENAME=(DEATHS=DEATHS_NOIPMAX_4))
           DEATHS_KM1 (WHERE=(INCOME_LEVEL EQ 3) RENAME=(DEATHS=DEATHS_TRIPS_3))
           DEATHS_KM2 (WHERE=(INCOME_LEVEL EQ 3) RENAME=(DEATHS=DEATHS_NOTRIPS_3))
           DEATHS_GP1 (WHERE=(INCOME_LEVEL EQ 3) RENAME=(DEATHS=DEATHS_GP_3))
           DEATHS_GP2 (WHERE=(INCOME_LEVEL EQ 3) RENAME=(DEATHS=DEATHS_NOGP_3))
           DEATHS_H1 (WHERE=(INCOME_LEVEL EQ 3) RENAME=(DEATHS=DEATHS_H_3))
           DEATHS_H2 (WHERE=(INCOME_LEVEL EQ 3) RENAME=(DEATHS=DEATHS_NOH_3))
           DEATHS_IPMIN1 (WHERE=(INCOME_LEVEL EQ 3) RENAME=(DEATHS=DEATHS_IPMIN_3))
           DEATHS_IPMIN2 (WHERE=(INCOME_LEVEL EQ 3) RENAME=(DEATHS=DEATHS_NOIPMIN_3))
           DEATHS_IPMAX1 (WHERE=(INCOME_LEVEL EQ 3) RENAME=(DEATHS=DEATHS_IPMAX_3))
           DEATHS_IPMAX2 (WHERE=(INCOME_LEVEL EQ 3) RENAME=(DEATHS=DEATHS_NOIPMAX_3))
           DEATHS_KM1 (WHERE=(INCOME_LEVEL EQ 2) RENAME=(DEATHS=DEATHS_TRIPS_2))
           DEATHS_KM2 (WHERE=(INCOME_LEVEL EQ 2) RENAME=(DEATHS=DEATHS_NOTRIPS_2))
           DEATHS_GP1 (WHERE=(INCOME_LEVEL EQ 2) RENAME=(DEATHS=DEATHS_GP_2))
           DEATHS_GP2 (WHERE=(INCOME_LEVEL EQ 2) RENAME=(DEATHS=DEATHS_NOGP_2))
           DEATHS_H1 (WHERE=(INCOME_LEVEL EQ 2) RENAME=(DEATHS=DEATHS_H_2))
           DEATHS_H2 (WHERE=(INCOME_LEVEL EQ 2) RENAME=(DEATHS=DEATHS_NOH_2))
           DEATHS_IPMIN1 (WHERE=(INCOME_LEVEL EQ 2) RENAME=(DEATHS=DEATHS_IPMIN_2))
           DEATHS_IPMIN2 (WHERE=(INCOME_LEVEL EQ 2) RENAME=(DEATHS=DEATHS_NOIPMIN_2))
           DEATHS_IPMAX1 (WHERE=(INCOME_LEVEL EQ 2) RENAME=(DEATHS=DEATHS_IPMAX_2))
           DEATHS_IPMAX2 (WHERE=(INCOME_LEVEL EQ 2) RENAME=(DEATHS=DEATHS_NOIPMAX_2))
           DEATHS_KM1 (WHERE=(INCOME_LEVEL EQ 1) RENAME=(DEATHS=DEATHS_TRIPS_1))
           DEATHS_KM2 (WHERE=(INCOME_LEVEL EQ 1) RENAME=(DEATHS=DEATHS_NOTRIPS_1))
           DEATHS_GP1 (WHERE=(INCOME_LEVEL EQ 1) RENAME=(DEATHS=DEATHS_GP_1))
           DEATHS_GP2 (WHERE=(INCOME_LEVEL EQ 1) RENAME=(DEATHS=DEATHS_NOGP_1))
           DEATHS_H1 (WHERE=(INCOME_LEVEL EQ 1) RENAME=(DEATHS=DEATHS_H_1))
           DEATHS_H2 (WHERE=(INCOME_LEVEL EQ 1) RENAME=(DEATHS=DEATHS_NOH_1))
           DEATHS_IPMIN1 (WHERE=(INCOME_LEVEL EQ 1) RENAME=(DEATHS=DEATHS_IPMIN_1))
           DEATHS_IPMIN2 (WHERE=(INCOME_LEVEL EQ 1) RENAME=(DEATHS=DEATHS_NOIPMIN_1))
           DEATHS_IPMAX1 (WHERE=(INCOME_LEVEL EQ 1) RENAME=(DEATHS=DEATHS_IPMAX_1))
           DEATHS_IPMAX2 (WHERE=(INCOME_LEVEL EQ 1) RENAME=(DEATHS=DEATHS_NOIPMAX_1));
       BY WHO_CODE YEAR _IMPUTATION_;
       PROC MEANS;     
       
   PROC SORT DATA=HERE.DISEASE_STARTS;
       BY WHO_CODE YEAR;       

   DATA HERE.REGRESSION_DATA (DROP=I _FREQ_ _TYPE_);
       MERGE HERE.DISEASE_STARTS (IN=IN1) DEATHS_WHO (IN=IN2);
       BY WHO_CODE YEAR;
       IF IN1 AND IN2;
       IF YEAR GE 1990 AND YEAR LE 2006;

       * Fix missing values;
       ARRAY STARTS{*} NEW_PHASE1-NEW_PHASE3 TREATMENTS1990;
       DO I=1 TO DIM(STARTS);
           IF STARTS{I} EQ . THEN STARTS{I} = 0;
       END;
       ARRAY DEATHS{*}
           DEATHS_TRIPS_4
           DEATHS_NOTRIPS_4
           DEATHS_GP_4
           DEATHS_NOGP_4
           DEATHS_H_4
           DEATHS_NOH_4
           DEATHS_IPMIN_4
           DEATHS_NOIPMIN_4
           DEATHS_IPMAX_4
           DEATHS_NOIPMAX_4
           DEATHS_TRIPS_3
           DEATHS_NOTRIPS_3
           DEATHS_GP_3
           DEATHS_NOGP_3
           DEATHS_H_3
           DEATHS_NOH_3
           DEATHS_IPMIN_3
           DEATHS_NOIPMIN_3
           DEATHS_IPMAX_3
           DEATHS_NOIPMAX_3
           DEATHS_TRIPS_2
           DEATHS_NOTRIPS_2
           DEATHS_GP_2
           DEATHS_NOGP_2
           DEATHS_H_2
           DEATHS_NOH_2
           DEATHS_IPMIN_2
           DEATHS_NOIPMIN_2
           DEATHS_IPMAX_2
           DEATHS_NOIPMAX_2
           DEATHS_TRIPS_1
           DEATHS_NOTRIPS_1
           DEATHS_GP_1
           DEATHS_NOGP_1
           DEATHS_H_1
           DEATHS_NOH_1
           DEATHS_IPMIN_1
           DEATHS_NOIPMIN_1
           DEATHS_IPMAX_1
           DEATHS_NOIPMAX_1;         
       DO I=1 TO DIM(DEATHS);
           IF DEATHS{I} EQ . THEN DEATHS{I} = 0;
           DEATHS{I} = LOG(DEATHS{I} + 1);
       END;
       
       PROC CONTENTS;
       PROC MEANS;

* Using lagged (by three years) market size;
   DATA DEATHS_WHO_LAG;
      SET DEATHS_WHO;
      YEAR = YEAR + 3;
   PROC SORT;
      BY WHO_CODE YEAR;

   DATA HERE.REGRESSION_DATA_LAG (DROP=I _FREQ_ _TYPE_);
       MERGE HERE.DISEASE_STARTS (IN=IN1) DEATHS_WHO_LAG (IN=IN2);
       BY WHO_CODE YEAR;
       IF IN1 AND IN2;
       IF YEAR GE 1990 AND YEAR LE 2006;

       * Fix missing values;
       ARRAY STARTS{*} NEW_PHASE1-NEW_PHASE3 TREATMENTS1990;
       DO I=1 TO DIM(STARTS);
           IF STARTS{I} EQ . THEN STARTS{I} = 0;
       END;
       ARRAY DEATHS{*}
           DEATHS_TRIPS_4
           DEATHS_NOTRIPS_4
           DEATHS_GP_4
           DEATHS_NOGP_4
           DEATHS_H_4
           DEATHS_NOH_4
           DEATHS_IPMIN_4
           DEATHS_NOIPMIN_4
           DEATHS_IPMAX_4
           DEATHS_NOIPMAX_4
           DEATHS_TRIPS_3
           DEATHS_NOTRIPS_3
           DEATHS_GP_3
           DEATHS_NOGP_3
           DEATHS_H_3
           DEATHS_NOH_3
           DEATHS_IPMIN_3
           DEATHS_NOIPMIN_3
           DEATHS_IPMAX_3
           DEATHS_NOIPMAX_3
           DEATHS_TRIPS_2
           DEATHS_NOTRIPS_2
           DEATHS_GP_2
           DEATHS_NOGP_2
           DEATHS_H_2
           DEATHS_NOH_2
           DEATHS_IPMIN_2
           DEATHS_NOIPMIN_2
           DEATHS_IPMAX_2
           DEATHS_NOIPMAX_2
           DEATHS_TRIPS_1
           DEATHS_NOTRIPS_1
           DEATHS_GP_1
           DEATHS_NOGP_1
           DEATHS_H_1
           DEATHS_NOH_1
           DEATHS_IPMIN_1
           DEATHS_NOIPMIN_1
           DEATHS_IPMAX_1
           DEATHS_NOIPMAX_1;         
       DO I=1 TO DIM(DEATHS);
           IF DEATHS{I} EQ . THEN DEATHS{I} = 0;
           DEATHS{I} = LOG(DEATHS{I} + 1);
       END;
       
       PROC CONTENTS;
       PROC MEANS;

ENDSAS;
